1) viability gating 2) Minibatch K means clustering 3) Annotating downsampled clusters w/ clustergrammer 4) Map annotations back to single cell 5) Generate figures: population frequencies and final UMAP
#Initiate empty data dictionary
df = {}
#Import necessary packages
from glob import glob
import pandas as pd
import numpy as np
import FlowCal
from copy import deepcopy
import umap.umap_ as umap
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.colors
from matplotlib import colors
from matplotlib.colors import hex2color, rgb2hex
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from plotnine import *
from glob import glob
%matplotlib inline
# import boto3
# s3 = boto3.client('s3')
import sys
import importlib
from copy import deepcopy
# sys.path.append('../../template_notebooks/templates')
# import himc_helper_functions_v0_17_4 as hf
# importlib.reload(hf)
import warnings
warnings.filterwarnings('ignore')
from clustergrammer2 import Network, CGM2
from clustergrammer2 import net
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import seaborn as sns
>> clustergrammer2 backend version 0.17.2
#Load relevant data
cell_file_path = '/Users/daniel.geanon/OneDrive - Karolinska Institutet/Mac/Desktop/Python_Projects/210604_HD_test/HIMC03_200716_stain_RT_Fix_4_RT_clean.parquet'
meta_cell_file_path = '/Users/daniel.geanon/OneDrive - Karolinska Institutet/Mac/Desktop/Python_Projects/210604_HD_test/HIMC03_200716_stain_RT_Fix_4_RT_meta_cell.parquet'
df['cell'] = pd.read_parquet(cell_file_path)
df['meta_cell'] = pd.read_parquet(meta_cell_file_path)
df['cell']
| 89Y_CD45 | 90Zr | 93Nb | 102Pd | 103Rh_Viability | 104Pd_Barcode | 105Pd_Barcode | 106Pd_Barcode | 108Pd_Barcode | 110Pd_Barcode | ... | 198Pt | 209Bi_CD11b | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| T-0_HIMC03_200716_stain_RT_Fix_4_RT | 267.158783 | 0.934784 | 0.0 | 0.0 | 1.339464 | 121.233276 | 1.795951 | 0.000000 | 0.000000 | 194.949493 | ... | 1.262914 | 0.000000 | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 |
| T-2_HIMC03_200716_stain_RT_Fix_4_RT | 71.808723 | 0.000000 | 0.0 | 0.0 | 0.000000 | 209.238647 | 2.176871 | 11.731404 | 0.274301 | 385.199280 | ... | 0.000000 | 0.000000 | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 |
| T-3_HIMC03_200716_stain_RT_Fix_4_RT | 71.861137 | 0.000000 | 0.0 | 0.0 | 0.000000 | 140.409286 | 7.252010 | 0.000000 | 3.571679 | 254.056168 | ... | 0.000000 | 62.441502 | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 |
| T-4_HIMC03_200716_stain_RT_Fix_4_RT | 230.573837 | 0.000000 | 0.0 | 0.0 | 0.000000 | 287.317047 | 5.349634 | 1.638112 | 1.603715 | 423.686249 | ... | 0.000000 | 0.000000 | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 |
| T-5_HIMC03_200716_stain_RT_Fix_4_RT | 53.299320 | 0.000000 | 0.0 | 0.0 | 0.362298 | 196.420303 | 3.712311 | 1.096084 | 0.000000 | 236.201035 | ... | 0.000000 | 6.627503 | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 176.676804 | 0.000000 | 0.0 | 0.0 | 0.000000 | 104.259087 | 4.877246 | 0.000000 | 0.000000 | 145.634796 | ... | 0.000000 | 0.000000 | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 |
| T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 153.630463 | 0.000000 | 0.0 | 0.0 | 0.000000 | 154.507507 | 13.463334 | 0.958680 | 7.900064 | 236.639374 | ... | 0.000000 | 0.000000 | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 |
| T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 55.318100 | 0.000000 | 0.0 | 0.0 | 0.000000 | 346.023499 | 10.585869 | 1.030947 | 0.000000 | 415.620636 | ... | 14.076699 | 0.000000 | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 |
| T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 119.376610 | 0.000000 | 0.0 | 0.0 | 3.392067 | 309.526794 | 0.000000 | 0.491797 | 3.361438 | 505.561615 | ... | 3.168129 | 339.149170 | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 |
| T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 213.778473 | 0.000000 | 0.0 | 0.0 | 0.000000 | 199.807922 | 10.064634 | 0.567633 | 0.000000 | 314.298676 | ... | 0.000000 | 0.000000 | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 |
154559 rows × 72 columns
df['meta_cell']
| Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | Event_Rate | Bead | Cell | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| T-0_HIMC03_200716_stain_RT_Fix_4_RT | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 | Pass | not-bead | y |
| T-1_HIMC03_200716_stain_RT_Fix_4_RT | 1.960900e+01 | 26.0 | 1156.135010 | 114.384003 | 92.160004 | 206.613998 | 0.617405 | 1.095147 | Pass | not-bead | n |
| T-2_HIMC03_200716_stain_RT_Fix_4_RT | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 | Pass | not-bead | y |
| T-3_HIMC03_200716_stain_RT_Fix_4_RT | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 | Pass | not-bead | y |
| T-4_HIMC03_200716_stain_RT_Fix_4_RT | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 | Pass | not-bead | y |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 | Pass | not-bead | y |
| T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 | Pass | not-bead | y |
| T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 | Pass | not-bead | y |
| T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 | Pass | not-bead | y |
| T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 | Pass | not-bead | y |
160587 rows × 11 columns
For some reason, there are more cells in 'meta_cell', than 'cell', so we'll drop meta_cells that do not exist in cell. We can then verify that the 'cell' and 'meta_cell' indices match.
df['meta_cell'] = df['meta_cell'].loc[df['cell'].index.tolist()]
df['meta_cell']
| Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | Event_Rate | Bead | Cell | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| T-0_HIMC03_200716_stain_RT_Fix_4_RT | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 | Pass | not-bead | y |
| T-2_HIMC03_200716_stain_RT_Fix_4_RT | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 | Pass | not-bead | y |
| T-3_HIMC03_200716_stain_RT_Fix_4_RT | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 | Pass | not-bead | y |
| T-4_HIMC03_200716_stain_RT_Fix_4_RT | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 | Pass | not-bead | y |
| T-5_HIMC03_200716_stain_RT_Fix_4_RT | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 | Pass | not-bead | y |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 | Pass | not-bead | y |
| T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 | Pass | not-bead | y |
| T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 | Pass | not-bead | y |
| T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 | Pass | not-bead | y |
| T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 | Pass | not-bead | y |
154559 rows × 11 columns
df['cell'].index.tolist() == df['meta_cell'].index.tolist()
True
df['cell']
| 89Y_CD45 | 90Zr | 93Nb | 102Pd | 103Rh_Viability | 104Pd_Barcode | 105Pd_Barcode | 106Pd_Barcode | 108Pd_Barcode | 110Pd_Barcode | ... | 198Pt | 209Bi_CD11b | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| T-0_HIMC03_200716_stain_RT_Fix_4_RT | 267.158783 | 0.934784 | 0.0 | 0.0 | 1.339464 | 121.233276 | 1.795951 | 0.000000 | 0.000000 | 194.949493 | ... | 1.262914 | 0.000000 | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 |
| T-2_HIMC03_200716_stain_RT_Fix_4_RT | 71.808723 | 0.000000 | 0.0 | 0.0 | 0.000000 | 209.238647 | 2.176871 | 11.731404 | 0.274301 | 385.199280 | ... | 0.000000 | 0.000000 | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 |
| T-3_HIMC03_200716_stain_RT_Fix_4_RT | 71.861137 | 0.000000 | 0.0 | 0.0 | 0.000000 | 140.409286 | 7.252010 | 0.000000 | 3.571679 | 254.056168 | ... | 0.000000 | 62.441502 | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 |
| T-4_HIMC03_200716_stain_RT_Fix_4_RT | 230.573837 | 0.000000 | 0.0 | 0.0 | 0.000000 | 287.317047 | 5.349634 | 1.638112 | 1.603715 | 423.686249 | ... | 0.000000 | 0.000000 | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 |
| T-5_HIMC03_200716_stain_RT_Fix_4_RT | 53.299320 | 0.000000 | 0.0 | 0.0 | 0.362298 | 196.420303 | 3.712311 | 1.096084 | 0.000000 | 236.201035 | ... | 0.000000 | 6.627503 | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 176.676804 | 0.000000 | 0.0 | 0.0 | 0.000000 | 104.259087 | 4.877246 | 0.000000 | 0.000000 | 145.634796 | ... | 0.000000 | 0.000000 | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 |
| T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 153.630463 | 0.000000 | 0.0 | 0.0 | 0.000000 | 154.507507 | 13.463334 | 0.958680 | 7.900064 | 236.639374 | ... | 0.000000 | 0.000000 | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 |
| T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 55.318100 | 0.000000 | 0.0 | 0.0 | 0.000000 | 346.023499 | 10.585869 | 1.030947 | 0.000000 | 415.620636 | ... | 14.076699 | 0.000000 | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 |
| T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 119.376610 | 0.000000 | 0.0 | 0.0 | 3.392067 | 309.526794 | 0.000000 | 0.491797 | 3.361438 | 505.561615 | ... | 3.168129 | 339.149170 | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 |
| T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 213.778473 | 0.000000 | 0.0 | 0.0 | 0.000000 | 199.807922 | 10.064634 | 0.567633 | 0.000000 | 314.298676 | ... | 0.000000 | 0.000000 | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 |
154559 rows × 72 columns
df['cell'].loc[:,'89Y_CD45':'209Bi_CD11b'] = np.arcsinh(df['cell'].loc[:,'89Y_CD45':'209Bi_CD11b']/5)
df['cell']
| 89Y_CD45 | 90Zr | 93Nb | 102Pd | 103Rh_Viability | 104Pd_Barcode | 105Pd_Barcode | 106Pd_Barcode | 108Pd_Barcode | 110Pd_Barcode | ... | 198Pt | 209Bi_CD11b | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| T-0_HIMC03_200716_stain_RT_Fix_4_RT | 4.671640 | 0.185884 | 0.0 | 0.0 | 0.264788 | 3.881851 | 0.351883 | 0.000000 | 0.000000 | 4.356614 | ... | 0.249971 | 0.000000 | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 |
| T-2_HIMC03_200716_stain_RT_Fix_4_RT | 3.358925 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.427327 | 0.422676 | 1.588577 | 0.054833 | 5.037512 | ... | 0.000000 | 0.000000 | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 |
| T-3_HIMC03_200716_stain_RT_Fix_4_RT | 3.359653 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.028588 | 1.166933 | 0.000000 | 0.664371 | 4.621361 | ... | 0.000000 | 3.219539 | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 |
| T-4_HIMC03_200716_stain_RT_Fix_4_RT | 4.524398 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.744371 | 0.929965 | 0.322028 | 0.315484 | 5.132737 | ... | 0.000000 | 0.000000 | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 |
| T-5_HIMC03_200716_stain_RT_Fix_4_RT | 3.061826 | 0.000000 | 0.0 | 0.0 | 0.072396 | 4.364128 | 0.687106 | 0.217498 | 0.000000 | 4.548505 | ... | 0.000000 | 1.093904 | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 4.258232 | 0.000000 | 0.0 | 0.0 | 0.000000 | 3.731163 | 0.863907 | 0.000000 | 0.000000 | 4.065106 | ... | 0.000000 | 0.000000 | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 |
| T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 4.118524 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.124214 | 1.716502 | 0.190580 | 1.238342 | 4.550358 | ... | 0.000000 | 0.000000 | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 |
| T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 3.098846 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.930268 | 1.494842 | 0.204756 | 0.000000 | 5.113518 | ... | 1.758376 | 0.000000 | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 |
| T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 3.866431 | 0.000000 | 0.0 | 0.0 | 0.634893 | 4.818819 | 0.000000 | 0.098201 | 0.629816 | 5.309404 | ... | 0.597444 | 4.910204 | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 |
| T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 4.448786 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.381222 | 1.449402 | 0.113284 | 0.000000 | 4.834116 | ... | 0.000000 | 0.000000 | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 |
154559 rows × 72 columns
Here, we'll utilize a bokeh script written by Darwin to manually gate on Rh103-Pt195- Live Cells.
from IPython.display import display, Markdown
x = df['cell']['195Pt_Viability']
y = df['cell']['103Rh_Viability']
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models.sources import ColumnDataSource
from bokeh.models.callbacks import CustomJS
from bokeh.io import output_notebook, show
#Instructions: Gate on Rh103-Pt195- cells. The cells you gate on will be represented in the 'selected_indices' variable
%time
from random import random
from bokeh.layouts import row, column
from bokeh.models import ColumnDataSource, CustomJS
from bokeh.plotting import figure, show
output_notebook()
# x = [random() for x in range(500)]
# y = [random() for y in range(500)]
s1 = ColumnDataSource(data=dict(x=x, y=y))
tools = ['box_select', 'lasso_select', 'reset']
p1 = figure(plot_width=400, plot_height=400,tools=tools, title="Select Here",output_backend="webgl",
x_axis_label='195Pt_Viability', y_axis_label='103Rh_Viability')
p1.circle('x', 'y', source=s1, alpha=0.5, size=2)
s2 = ColumnDataSource(data=dict(x=[], y=[]))
p2 = figure(plot_width=400, plot_height=400,x_range=(0, max(x)), y_range=(0, max(y)),
tools="", title="Watch Here", output_backend="webgl",
x_axis_label='195Pt_Viability', y_axis_label='103Rh_Viability')
p2.circle('x', 'y', source=s2, alpha=0.5, size=2)
s1.selected.js_on_change('indices', CustomJS(args=dict(s1=s1, s2=s2), code="""
var inds = cb_obj.indices;
var d1 = s1.data;
var d2 = s2.data;
d2['x'] = []
d2['y'] = []
for (var i = 0; i < inds.length; i++) {
d2['x'].push(d1['x'][inds[i]])
d2['y'].push(d1['y'][inds[i]])
}
s2.change.emit();
""")
)
callback = CustomJS(args=dict(s=s1),
code="""
console.log('Running CustomJS callback now.');
var indices = s.selected.indices;
var kernel = IPython.notebook.kernel;
kernel.execute("selected_indices = " + indices)
""")
# set the callback to run when a selection geometry event occurs in the figure
p1.js_on_event('selectiongeometry', callback)
# show(fig)
layout = row(p1, p2)
show(layout)
selected_indices
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 169, 171, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 192, 193, 194, 195, 196, 198, 199, 200, 201, 202, 203, 204, 206, 207, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 230, 231, 232, 233, 235, 236, 237, 238, 239, 240, 241, 243, 244, 246, 247, 248, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 289, 290, 291, 293, 294, 295, 297, 299, 301, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 318, 319, 320, 321, 322, 323, 324, 325, 327, 328, 329, 330, 331, 332, 333, 335, 336, 339, 340, 341, 342, 343, 344, 345, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 420, 421, 422, 423, 424, 425, 426, 427, 429, 430, 431, 432, 433, 434, 435, 437, 438, 439, 440, 441, 442, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 463, 464, 465, 466, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 494, 495, 496, 497, 498, 499, 500, 501, 502, 504, 506, 507, 508, 509, 510, 511, 512, 513, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 532, 533, 534, 535, 536, 537, 538, 540, 541, 542, 543, 545, 546, 547, 548, 549, 550, 552, 553, 554, 555, 556, 557, 558, 559, 560, 562, 564, 566, 567, 568, 569, 570, 571, 572, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 590, 591, 592, 593, 594, 595, 596, 599, 600, 601, 602, 604, 605, 606, 607, 608, 609, 610, 611, 612, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 630, 631, 632, 633, 634, 635, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 665, 666, 667, 669, 670, 671, 672, 673, 674, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 780, 781, 783, 784, 785, 786, 788, 789, 790, 791, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 821, 822, 823, 824, 825, 826, 828, 829, 830, 831, 833, 834, 835, 836, 838, 839, 840, 841, 842, 843, 845, 846, 848, 849, 850, 851, 852, 853, 856, 857, 858, 859, 860, 861, 862, 863, 865, 866, 867, 868, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 916, 917, 918, 919, 920, 921, 922, 923, 924, 926, 927, 928, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 993, 994, 995, 996, 997, 998, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1021, 1022, 1023, 1024, 1025, 1026, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1039, 1040, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1079, 1080, 1081, 1082, 1083, 1084, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, ...)
# Creating new column in 'meta_cell' where 'Viability' = 'Dead'
df['meta_cell']['Viability'] = 'Dead'
index_live = df['meta_cell'].iloc[[x for x in selected_indices]].index.tolist()
# Cells w/in 'selected_indices' are assigned 'Viability' = 'Live'
df['meta_cell'].loc[index_live,'Viability'] = 'Live'
df['meta_cell']['Viability'].value_counts()
Live 141429 Dead 13130 Name: Viability, dtype: int64
# Only proceeding with live cells for further analysis
df['meta_cell_live'] = df['meta_cell'].loc[df['meta_cell']['Viability'] == 'Live']
df['cell_live'] = df['cell'].loc[df['meta_cell_live'].index.tolist()]
df['meta_cell_live']
| Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | Event_Rate | Bead | Cell | Viability | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| T-0_HIMC03_200716_stain_RT_Fix_4_RT | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 | Pass | not-bead | y | Live |
| T-2_HIMC03_200716_stain_RT_Fix_4_RT | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 | Pass | not-bead | y | Live |
| T-3_HIMC03_200716_stain_RT_Fix_4_RT | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 | Pass | not-bead | y | Live |
| T-4_HIMC03_200716_stain_RT_Fix_4_RT | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 | Pass | not-bead | y | Live |
| T-5_HIMC03_200716_stain_RT_Fix_4_RT | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 | Pass | not-bead | y | Live |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 | Pass | not-bead | y | Live |
| T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 | Pass | not-bead | y | Live |
| T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 | Pass | not-bead | y | Live |
| T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 | Pass | not-bead | y | Live |
| T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 | Pass | not-bead | y | Live |
141429 rows × 12 columns
df['cell_live']
| 89Y_CD45 | 90Zr | 93Nb | 102Pd | 103Rh_Viability | 104Pd_Barcode | 105Pd_Barcode | 106Pd_Barcode | 108Pd_Barcode | 110Pd_Barcode | ... | 198Pt | 209Bi_CD11b | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| T-0_HIMC03_200716_stain_RT_Fix_4_RT | 4.671640 | 0.185884 | 0.0 | 0.0 | 0.264788 | 3.881851 | 0.351883 | 0.000000 | 0.000000 | 4.356614 | ... | 0.249971 | 0.000000 | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 |
| T-2_HIMC03_200716_stain_RT_Fix_4_RT | 3.358925 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.427327 | 0.422676 | 1.588577 | 0.054833 | 5.037512 | ... | 0.000000 | 0.000000 | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 |
| T-3_HIMC03_200716_stain_RT_Fix_4_RT | 3.359653 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.028588 | 1.166933 | 0.000000 | 0.664371 | 4.621361 | ... | 0.000000 | 3.219539 | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 |
| T-4_HIMC03_200716_stain_RT_Fix_4_RT | 4.524398 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.744371 | 0.929965 | 0.322028 | 0.315484 | 5.132737 | ... | 0.000000 | 0.000000 | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 |
| T-5_HIMC03_200716_stain_RT_Fix_4_RT | 3.061826 | 0.000000 | 0.0 | 0.0 | 0.072396 | 4.364128 | 0.687106 | 0.217498 | 0.000000 | 4.548505 | ... | 0.000000 | 1.093904 | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 4.258232 | 0.000000 | 0.0 | 0.0 | 0.000000 | 3.731163 | 0.863907 | 0.000000 | 0.000000 | 4.065106 | ... | 0.000000 | 0.000000 | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 |
| T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 4.118524 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.124214 | 1.716502 | 0.190580 | 1.238342 | 4.550358 | ... | 0.000000 | 0.000000 | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 |
| T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 3.098846 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.930268 | 1.494842 | 0.204756 | 0.000000 | 5.113518 | ... | 1.758376 | 0.000000 | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 |
| T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 3.866431 | 0.000000 | 0.0 | 0.0 | 0.634893 | 4.818819 | 0.000000 | 0.098201 | 0.629816 | 5.309404 | ... | 0.597444 | 4.910204 | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 |
| T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 4.448786 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.381222 | 1.449402 | 0.113284 | 0.000000 | 4.834116 | ... | 0.000000 | 0.000000 | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 |
141429 rows × 72 columns
# Establishing Markers for Downsampling (LyoA3)
clustering_parameters = [
'142Nd_CD19',
'143Nd_CD45RA',
'145Nd_CD4',
'146Nd_CD8',
'148Nd_CD16',
'150Nd_CD1c',
'151Eu_CD123',
'152Sm_CD66b',
'155Gd_CD27',
'160Gd_CD14',
'161Dy_CD56',
'168Er_CD3',
'170Er_CD38',
'174Yb_HLADR']
from sklearn.cluster import MiniBatchKMeans
#MiniBatch K Means Clustering
kmeans = MiniBatchKMeans(n_clusters=1000, random_state=99).fit(df['cell_live'][clustering_parameters])
#Adding Downsample (DS)10000 annotations to meta_cell and cell
df['meta_cell_live'] = pd.concat([df['meta_cell_live'].reset_index(),pd.Series(kmeans.labels_,name = 'DS1000')],axis=1)
df['meta_cell_live']
| index | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | Event_Rate | Bead | Cell | Viability | DS1000 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | T-0_HIMC03_200716_stain_RT_Fix_4_RT | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 | Pass | not-bead | y | Live | 501 |
| 1 | T-2_HIMC03_200716_stain_RT_Fix_4_RT | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 | Pass | not-bead | y | Live | 276 |
| 2 | T-3_HIMC03_200716_stain_RT_Fix_4_RT | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 | Pass | not-bead | y | Live | 963 |
| 3 | T-4_HIMC03_200716_stain_RT_Fix_4_RT | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 | Pass | not-bead | y | Live | 760 |
| 4 | T-5_HIMC03_200716_stain_RT_Fix_4_RT | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 | Pass | not-bead | y | Live | 304 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 141424 | T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 | Pass | not-bead | y | Live | 771 |
| 141425 | T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 | Pass | not-bead | y | Live | 513 |
| 141426 | T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 | Pass | not-bead | y | Live | 628 |
| 141427 | T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 | Pass | not-bead | y | Live | 421 |
| 141428 | T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 | Pass | not-bead | y | Live | 361 |
141429 rows × 14 columns
df['cell_live'] = pd.concat([df['cell_live'].reset_index(),pd.Series(kmeans.labels_,name = 'DS1000').astype(str)],axis=1)
df['cell_live']
| index | 89Y_CD45 | 90Zr | 93Nb | 102Pd | 103Rh_Viability | 104Pd_Barcode | 105Pd_Barcode | 106Pd_Barcode | 108Pd_Barcode | ... | 209Bi_CD11b | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | DS1000 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | T-0_HIMC03_200716_stain_RT_Fix_4_RT | 4.671640 | 0.185884 | 0.0 | 0.0 | 0.264788 | 3.881851 | 0.351883 | 0.000000 | 0.000000 | ... | 0.000000 | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 | 501 |
| 1 | T-2_HIMC03_200716_stain_RT_Fix_4_RT | 3.358925 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.427327 | 0.422676 | 1.588577 | 0.054833 | ... | 0.000000 | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 | 276 |
| 2 | T-3_HIMC03_200716_stain_RT_Fix_4_RT | 3.359653 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.028588 | 1.166933 | 0.000000 | 0.664371 | ... | 3.219539 | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 | 963 |
| 3 | T-4_HIMC03_200716_stain_RT_Fix_4_RT | 4.524398 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.744371 | 0.929965 | 0.322028 | 0.315484 | ... | 0.000000 | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 | 760 |
| 4 | T-5_HIMC03_200716_stain_RT_Fix_4_RT | 3.061826 | 0.000000 | 0.0 | 0.0 | 0.072396 | 4.364128 | 0.687106 | 0.217498 | 0.000000 | ... | 1.093904 | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 | 304 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 141424 | T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 4.258232 | 0.000000 | 0.0 | 0.0 | 0.000000 | 3.731163 | 0.863907 | 0.000000 | 0.000000 | ... | 0.000000 | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 | 771 |
| 141425 | T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 4.118524 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.124214 | 1.716502 | 0.190580 | 1.238342 | ... | 0.000000 | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 | 513 |
| 141426 | T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 3.098846 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.930268 | 1.494842 | 0.204756 | 0.000000 | ... | 0.000000 | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 | 628 |
| 141427 | T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 3.866431 | 0.000000 | 0.0 | 0.0 | 0.634893 | 4.818819 | 0.000000 | 0.098201 | 0.629816 | ... | 4.910204 | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 | 421 |
| 141428 | T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 4.448786 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.381222 | 1.449402 | 0.113284 | 0.000000 | ... | 0.000000 | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 | 361 |
141429 rows × 74 columns
# We want to annotate cells at the ds1000 level, therefore we group cells by mean intensity per marker by downsample cluster number
df['ds1000_cell'] = df['cell_live'].groupby(by=['DS1000']).mean()
df['ds1000_cell']
| 89Y_CD45 | 90Zr | 93Nb | 102Pd | 103Rh_Viability | 104Pd_Barcode | 105Pd_Barcode | 106Pd_Barcode | 108Pd_Barcode | 110Pd_Barcode | ... | 198Pt | 209Bi_CD11b | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| DS1000 | |||||||||||||||||||||
| 0 | 4.700082 | 0.083234 | 0.003825 | 0.001458 | 0.340904 | 4.484458 | 0.907791 | 0.422260 | 0.304100 | 4.991335 | ... | 0.291628 | 0.072901 | 3.042326e+06 | 30.171171 | 1291.621685 | 100.395676 | 89.693072 | 134.113477 | 0.632188 | 2.872219 |
| 1 | 4.591762 | 0.067685 | 0.015159 | 0.010614 | 0.648820 | 4.682981 | 0.930157 | 0.430890 | 0.343951 | 5.181406 | ... | 0.416364 | 0.119810 | 3.439055e+06 | 30.054054 | 1293.332269 | 106.487157 | 95.022081 | 141.340043 | 0.669710 | 2.641501 |
| 10 | 4.434598 | 0.053697 | 0.005121 | 0.001623 | 0.543328 | 4.581661 | 0.867868 | 0.497445 | 0.369186 | 5.079806 | ... | 0.435914 | 1.149665 | 3.116999e+06 | 29.934211 | 1268.963264 | 104.855000 | 93.521776 | 138.982697 | 0.652753 | 2.594130 |
| 100 | 3.514340 | 0.081959 | 0.004233 | 0.011747 | 0.596421 | 4.635076 | 0.885786 | 0.493020 | 0.463627 | 5.138808 | ... | 0.634571 | 5.146466 | 2.850065e+06 | 28.100671 | 1186.528539 | 101.374047 | 84.627014 | 118.567913 | 0.658739 | 2.417530 |
| 101 | 4.440916 | 0.122186 | 0.005265 | 0.007005 | 0.639716 | 5.057707 | 1.143159 | 0.535157 | 0.442907 | 5.550769 | ... | 0.657316 | 0.719769 | 2.822885e+06 | 30.747664 | 1316.329322 | 106.653037 | 96.944215 | 136.182150 | 0.712640 | 3.689470 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 995 | 4.745540 | 0.087524 | 0.000000 | 0.000000 | 0.432743 | 4.567380 | 0.872447 | 0.521721 | 0.382871 | 5.030187 | ... | 0.296771 | 0.977440 | 2.987495e+06 | 35.150000 | 1482.882651 | 105.649750 | 108.200150 | 140.319599 | 0.648891 | 2.465232 |
| 996 | 4.551662 | 0.117476 | 0.012773 | 0.003607 | 1.088188 | 4.817651 | 0.991222 | 0.515233 | 0.395215 | 5.366480 | ... | 0.444482 | 0.107634 | 2.829871e+06 | 32.911111 | 1379.457690 | 105.113200 | 103.343666 | 148.722222 | 0.684395 | 2.932722 |
| 997 | 3.932637 | 0.082964 | 0.004977 | 0.023157 | 0.539717 | 4.821434 | 1.008463 | 0.535259 | 0.377502 | 5.363732 | ... | 0.480577 | 0.262682 | 2.922869e+06 | 31.877193 | 1384.485052 | 107.068526 | 101.020579 | 145.496526 | 0.684425 | 2.991945 |
| 998 | 4.099609 | 0.015141 | 0.003564 | 0.000000 | 0.359042 | 4.488890 | 0.829744 | 0.281583 | 0.323692 | 5.076166 | ... | 0.522082 | 0.270556 | 3.504561e+06 | 33.000000 | 1437.676098 | 102.229333 | 101.092952 | 152.403762 | 0.642790 | 3.162818 |
| 999 | 2.996914 | 0.134581 | 0.005998 | 0.000000 | 0.538851 | 4.701279 | 1.030591 | 0.428052 | 0.343690 | 5.203071 | ... | 0.643780 | 4.950984 | 2.808717e+06 | 29.232558 | 1239.436558 | 103.587395 | 90.009372 | 125.602442 | 0.674582 | 2.257728 |
998 rows × 72 columns
# Creating our matrix where our DS1000 cell annotations will live
df['ds1000_cell_annotations'] = pd.DataFrame(index = df['ds1000_cell'].index.tolist())
df['ds1000_cell_annotations']['DS1000'] = df['ds1000_cell_annotations'].index.tolist()
df['ds1000_cell_annotations']['Tier1'] = ''
df['ds1000_cell_annotations']
| DS1000 | Tier1 | |
|---|---|---|
| 0 | 0 | |
| 1 | 1 | |
| 10 | 10 | |
| 100 | 100 | |
| 101 | 101 | |
| ... | ... | ... |
| 995 | 995 | |
| 996 | 996 | |
| 997 | 997 | |
| 998 | 998 | |
| 999 | 999 |
998 rows × 2 columns
Here, we will iteratively annotate the downsampled clusters in a tierwise fashion, increasing in granularity by tier
#In Tier1, we will focus on just the major broad immune subsets
Tier1_markers = [
'142Nd_CD19',
'143Nd_CD45RA',
'145Nd_CD4',
'146Nd_CD8',
'148Nd_CD16',
'150Nd_CD1c',
'151Eu_CD123',
'152Sm_CD66b',
'155Gd_CD27',
'160Gd_CD14',
'161Dy_CD56',
'168Er_CD3',
'170Er_CD38',
'174Yb_HLADR']
# Valid annotations = ['T Cell', 'Monocyte/DC', 'B Cell', 'NK Cell', 'Basophil', 'Neutrophil']
# Clustergrammer Tier 1
# Instructions: click on a cluster and manually type in annotation, whatever you type will be saved into df['ds1000_cell_annotations']['Tier1']
n1 = Network(CGM2)
n1.load_df(df['ds1000_cell'][Tier1_markers].transpose(), meta_col=df['ds1000_cell_annotations'],col_cats = ['Tier1'])
# n1.set_global_cat_colors(df['cell_types'])
n1.set_manual_category(col='Tier1')
n1.widget()
df['ds1000_cell_annotations']['Tier1'].value_counts()
T Cell 387 Monocyte/DC 269 B Cell 242 NK Cell 89 Basophil 8 Neutrophil 3 Name: Tier1, dtype: int64
For each Tier1 annotation, we will load a heatmap with just those clusters (i.e. only T cells, and annotate those T cells more granulary and save to Tier2.
df['ds1000_cell_annotations']['Tier2'] = ''
#Tier2 T cell -- Loading only the clusters where Tier1 == 'T Cell'
T_Cell_clusters = df['ds1000_cell_annotations'].loc[df['ds1000_cell_annotations']['Tier1'] == 'T Cell'].index.tolist()
T_Cell_cluster_list = pd.DataFrame(columns = df['ds1000_cell'].columns.to_list())
for value in T_Cell_clusters:
row = df['ds1000_cell'].loc[df['ds1000_cell'].index == value]
T_Cell_cluster_list = pd.concat([T_Cell_cluster_list,row])
T_Cell_cluster_list
| 89Y_CD45 | 90Zr | 93Nb | 102Pd | 103Rh_Viability | 104Pd_Barcode | 105Pd_Barcode | 106Pd_Barcode | 108Pd_Barcode | 110Pd_Barcode | ... | 198Pt | 209Bi_CD11b | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 4.700082 | 0.083234 | 0.003825 | 0.001458 | 0.340904 | 4.484458 | 0.907791 | 0.422260 | 0.304100 | 4.991335 | ... | 0.291628 | 0.072901 | 3.042326e+06 | 30.171171 | 1291.621685 | 100.395676 | 89.693072 | 134.113477 | 0.632188 | 2.872219 |
| 1 | 4.591762 | 0.067685 | 0.015159 | 0.010614 | 0.648820 | 4.682981 | 0.930157 | 0.430890 | 0.343951 | 5.181406 | ... | 0.416364 | 0.119810 | 3.439055e+06 | 30.054054 | 1293.332269 | 106.487157 | 95.022081 | 141.340043 | 0.669710 | 2.641501 |
| 10 | 4.434598 | 0.053697 | 0.005121 | 0.001623 | 0.543328 | 4.581661 | 0.867868 | 0.497445 | 0.369186 | 5.079806 | ... | 0.435914 | 1.149665 | 3.116999e+06 | 29.934211 | 1268.963264 | 104.855000 | 93.521776 | 138.982697 | 0.652753 | 2.594130 |
| 104 | 4.626679 | 0.080038 | 0.011891 | 0.007354 | 0.431803 | 4.582486 | 0.944829 | 0.381705 | 0.367501 | 5.095828 | ... | 0.344532 | 0.140389 | 2.732029e+06 | 28.872928 | 1244.203505 | 104.996751 | 89.807348 | 131.949470 | 0.647183 | 3.104855 |
| 109 | 4.697966 | 0.105461 | 0.014573 | 0.015318 | 0.472579 | 4.588917 | 0.907967 | 0.492001 | 0.346790 | 5.091223 | ... | 0.313424 | 0.547746 | 3.712789e+06 | 29.380952 | 1221.931202 | 99.181620 | 85.438905 | 131.032190 | 0.635369 | 3.340593 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 989 | 4.579085 | 0.091692 | 0.014480 | 0.010940 | 0.455677 | 4.544626 | 0.851459 | 0.398063 | 0.319297 | 5.062654 | ... | 0.358022 | 0.107709 | 2.956483e+06 | 29.677551 | 1268.896758 | 105.914304 | 93.506178 | 142.390127 | 0.650907 | 2.855573 |
| 993 | 4.171911 | 0.068962 | 0.007895 | 0.009033 | 0.276108 | 4.362679 | 0.715696 | 0.325493 | 0.296449 | 4.865373 | ... | 0.233901 | 0.083085 | 2.730289e+06 | 27.673729 | 1194.325293 | 103.837629 | 84.994903 | 121.741561 | 0.632924 | 2.986457 |
| 995 | 4.745540 | 0.087524 | 0.000000 | 0.000000 | 0.432743 | 4.567380 | 0.872447 | 0.521721 | 0.382871 | 5.030187 | ... | 0.296771 | 0.977440 | 2.987495e+06 | 35.150000 | 1482.882651 | 105.649750 | 108.200150 | 140.319599 | 0.648891 | 2.465232 |
| 996 | 4.551662 | 0.117476 | 0.012773 | 0.003607 | 1.088188 | 4.817651 | 0.991222 | 0.515233 | 0.395215 | 5.366480 | ... | 0.444482 | 0.107634 | 2.829871e+06 | 32.911111 | 1379.457690 | 105.113200 | 103.343666 | 148.722222 | 0.684395 | 2.932722 |
| 998 | 4.099609 | 0.015141 | 0.003564 | 0.000000 | 0.359042 | 4.488890 | 0.829744 | 0.281583 | 0.323692 | 5.076166 | ... | 0.522082 | 0.270556 | 3.504561e+06 | 33.000000 | 1437.676098 | 102.229333 | 101.092952 | 152.403762 | 0.642790 | 3.162818 |
387 rows × 72 columns
# Establishing markers to use for Tier2 T cell annotations
t_cell_markers = [
'143Nd_CD45RA',
'145Nd_CD4',
'146Nd_CD8',
'149Sm_CD127',
'153Eu_TIGIT',
'154Sm_ICOS',
'155Gd_CD27',
'161Dy_CD56',
'162Dy_TCRgd',
'163Dy_CD185',
'167Er_CCR7',
'168Er_CD3',
'169Tm_CD25',
'170Er_CD38',
'174Yb_HLADR',
'175Lu_CD279']
# Valid annotations = ['CD4+ Central Memory T Cell',
# 'CD4+ Naive T Cell',
# 'CD8+ Effector Memory T Cell',
# 'CD8+ Naive T Cell',
# 'CD4+ Effector Memory T Cell',
# 'GD T Cell',
# 'CD4-CD8- T Cell',
# 'CD8+ TEMRA',
# 'NKT Cell',
# 'CD4+ CD8+ T Cell',
# 'CD8+ Central Memory T Cell',
# 'CD4+ TEMRA']
n1 = Network(CGM2)
n1.load_df(T_Cell_cluster_list[t_cell_markers].transpose(), meta_col=df['ds1000_cell_annotations'],col_cats = ['Tier2'])
# n1.set_global_cat_colors(df['cell_types'])
n1.set_manual_category(col='Tier2')
n1.widget()
df['ds1000_cell_annotations'].loc[df['ds1000_cell_annotations']['Tier1'] == 'T Cell']['Tier2'].value_counts()
CD4+ Central Memory T Cell 104 CD4+ Naive T Cell 70 CD8+ Effector Memory T Cell 58 CD8+ Naive T Cell 43 CD4+ Effector Memory T Cell 35 GD T Cell 33 CD4-CD8- T Cell 15 CD8+ TEMRA 13 NKT Cell 12 CD4+ CD8+ T Cell 4 Name: Tier2, dtype: int64
#Tier3 T cell, we will annotate activation status (i.e. annotate 'HLADR+' or 'CD38+', etc.)
df['ds1000_cell_annotations']['Tier3'] = ''
t_cell_markers_3 = [
'153Eu_TIGIT',
'154Sm_ICOS',
'163Dy_CD185',
'170Er_CD38',
'174Yb_HLADR',
'175Lu_CD279']
n1 = Network(CGM2)
n1.load_df(T_Cell_cluster_list[t_cell_markers_3].transpose(), meta_col=df['ds1000_cell_annotations'],col_cats = ['Tier3'])
# n1.set_global_cat_colors(df['cell_types'])
n1.set_manual_category(col='Tier3')
n1.widget()
df['ds1000_cell_annotations'].loc[df['ds1000_cell_annotations']['Tier1'] =='T Cell']['Tier3'].value_counts()
225 CD38+ 109 HLADR+ 26 CD38+ HLADR+ 25 PD-1+ TIGIT+ 2 Name: Tier3, dtype: int64
# View Final T cell heatmap
n1 = Network(CGM2)
n1.load_df(T_Cell_cluster_list[t_cell_markers].transpose(), meta_col=df['ds1000_cell_annotations'],col_cats = ['Tier1','Tier2','Tier3'])
# n1.set_global_cat_colors(df['cell_types'])
# n1.set_manual_category(col='Tier3')
n1.widget()
#Annotate B cell
#Tier2 B Cell
B_Cell_clusters = df['ds1000_cell_annotations'].loc[df['ds1000_cell_annotations']['Tier1'] == 'B Cell'].index.tolist()
B_Cell_cluster_list = pd.DataFrame(columns = df['ds1000_cell'].columns.to_list())
for value in B_Cell_clusters:
row = df['ds1000_cell'].loc[df['ds1000_cell'].index == value]
B_Cell_cluster_list = pd.concat([B_Cell_cluster_list,row])
B_Cell_cluster_list
| 89Y_CD45 | 90Zr | 93Nb | 102Pd | 103Rh_Viability | 104Pd_Barcode | 105Pd_Barcode | 106Pd_Barcode | 108Pd_Barcode | 110Pd_Barcode | ... | 198Pt | 209Bi_CD11b | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 120 | 4.560632 | 0.028672 | 0.034434 | 0.015482 | 0.401652 | 4.547425 | 0.772825 | 0.350141 | 0.615579 | 5.113514 | ... | 0.356416 | 0.188936 | 3.537979e+06 | 31.200000 | 1338.895264 | 106.120467 | 96.801332 | 116.338532 | 0.647636 | 2.998325 |
| 126 | 4.904915 | 0.063985 | 0.003773 | 0.003207 | 0.436320 | 4.738891 | 0.918678 | 0.449562 | 0.542047 | 5.276600 | ... | 0.375030 | 0.195892 | 3.190149e+06 | 33.652174 | 1409.365535 | 96.510405 | 94.755391 | 113.130971 | 0.674413 | 2.516351 |
| 129 | 5.127013 | 0.071947 | 0.026333 | 0.000000 | 0.714617 | 5.220159 | 1.543429 | 1.271481 | 0.732157 | 5.735044 | ... | 0.489827 | 0.270887 | 3.148000e+06 | 39.181818 | 1611.656988 | 98.531092 | 116.826453 | 151.112906 | 0.654352 | 4.647775 |
| 132 | 4.633403 | 0.085993 | 0.010019 | 0.007360 | 0.278977 | 4.513586 | 0.951620 | 0.369303 | 0.267373 | 5.021094 | ... | 0.441221 | 0.129529 | 3.372561e+06 | 33.493151 | 1396.476215 | 100.971767 | 99.300164 | 126.774876 | 0.631502 | 2.801053 |
| 135 | 4.399182 | 0.055255 | 0.010050 | 0.010919 | 0.300690 | 4.411779 | 0.813171 | 0.293092 | 0.341525 | 4.905985 | ... | 0.246600 | 0.125796 | 3.218109e+06 | 29.553763 | 1278.148948 | 104.776591 | 91.168183 | 122.613543 | 0.634365 | 2.810237 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 99 | 4.376857 | 0.067276 | 0.009137 | 0.005160 | 0.202958 | 4.271998 | 0.693948 | 0.317235 | 0.272253 | 4.780977 | ... | 0.275503 | 0.118210 | 3.515197e+06 | 29.336182 | 1268.562352 | 105.849328 | 91.327476 | 125.552029 | 0.616781 | 2.977411 |
| 990 | 4.193011 | 0.088404 | 0.006144 | 0.010182 | 0.408652 | 4.446028 | 0.797168 | 0.452027 | 0.284520 | 4.972762 | ... | 0.399949 | 0.154807 | 3.290859e+06 | 30.404762 | 1308.348094 | 105.101278 | 94.183833 | 126.773270 | 0.636033 | 2.902944 |
| 992 | 4.257077 | 0.089389 | 0.000000 | 0.000000 | 0.303265 | 4.365450 | 0.761512 | 0.242810 | 0.304509 | 4.850489 | ... | 0.219424 | 0.083122 | 3.195488e+06 | 30.075472 | 1297.227286 | 103.338585 | 91.054887 | 127.075113 | 0.632185 | 2.750728 |
| 994 | 4.486584 | 0.114829 | 0.002278 | 0.003299 | 0.209231 | 4.288802 | 0.727500 | 0.333878 | 0.273143 | 4.808564 | ... | 0.280542 | 0.141121 | 3.303843e+06 | 30.437956 | 1311.672749 | 105.409949 | 94.148073 | 129.949533 | 0.608363 | 2.963339 |
| 997 | 3.932637 | 0.082964 | 0.004977 | 0.023157 | 0.539717 | 4.821434 | 1.008463 | 0.535259 | 0.377502 | 5.363732 | ... | 0.480577 | 0.262682 | 2.922869e+06 | 31.877193 | 1384.485052 | 107.068526 | 101.020579 | 145.496526 | 0.684425 | 2.991945 |
243 rows × 72 columns
df['ds1000_cell'].columns.tolist()
['89Y_CD45', '90Zr', '93Nb', '102Pd', '103Rh_Viability', '104Pd_Barcode', '105Pd_Barcode', '106Pd_Barcode', '108Pd_Barcode', '110Pd_Barcode', '111Cd', '112Cd', '113In_CD57', '114Cd_B2M', '115In_CD11c', '116Cd_B2M', '127I', '140Ce', '141Pr_CD33', '142Nd_CD19', '143Nd_CD45RA', '144Nd_CD141', '145Nd_CD4', '146Nd_CD8', '147Sm_CD20_', '148Nd_CD16', '149Sm_CD127', '150Nd_CD1c', '151Eu_CD123', '152Sm_CD66b', '153Eu_TIGIT', '154Sm_ICOS', '155Gd_CD27', '156Gd_CD274', '157Gd', '158Gd_CD196', '159Tb_CD24', '160Gd_CD14', '161Dy_CD56', '162Dy_TCRgd', '163Dy_CD185', '164Dy_CCR10', '165Ho_CD294', '166Er_CD169', '167Er_CCR7', '168Er_CD3', '169Tm_CD25', '170Er_CD38', '171Yb_CD161', '172Yb_CD39', '173Yb_CXCR3', '174Yb_HLADR', '175Lu_CD279', '176Yb_CD194', '181Ta', '189Os', '191Ir_DNA', '192Os_Osmium', '193Ir_DNA', '194Pt', '195Pt_Viability', '196Pt', '198Pt', '209Bi_CD11b', 'Time', 'Event_length', 'Center', 'Offset', 'Width', 'Residual', 'bc_separation_dist', 'mahalanobis_dist']
b_cell_markers = ['115In_CD11c',
'142Nd_CD19',
'143Nd_CD45RA',
'147Sm_CD20_',
'155Gd_CD27',
'163Dy_CD185',
'170Er_CD38',
'172Yb_CD39',
'174Yb_HLADR']
#Valid annotations = ['Naive B Cell','Memory B Cell','Plasmablast']
#Tier2 B Cell
n1 = Network(CGM2)
n1.load_df(B_Cell_cluster_list[b_cell_markers].transpose(), meta_col=df['ds1000_cell_annotations'],col_cats = ['Tier2'])
# n1.set_global_cat_colors(df['cell_types'])
n1.set_manual_category(col='Tier2')
n1.widget()
df['ds1000_cell_annotations'].loc[df['ds1000_cell_annotations']['Tier1'] == 'B Cell']['Tier2'].value_counts()
Naive B Cell 166
Memory B Cell 75
1
Plasmablast 1
Name: Tier2, dtype: int64
#Tier2 NK Cell
NK_Cell_clusters = df['ds1000_cell_annotations'].loc[df['ds1000_cell_annotations']['Tier1'] == 'NK Cell'].index.tolist()
NK_Cell_cluster_list = pd.DataFrame(columns = df['ds1000_cell'].columns.to_list())
for value in NK_Cell_clusters:
row = df['ds1000_cell'].loc[df['ds1000_cell'].index == value]
NK_Cell_cluster_list = pd.concat([NK_Cell_cluster_list,row])
NK_Cell_cluster_list
| 89Y_CD45 | 90Zr | 93Nb | 102Pd | 103Rh_Viability | 104Pd_Barcode | 105Pd_Barcode | 106Pd_Barcode | 108Pd_Barcode | 110Pd_Barcode | ... | 198Pt | 209Bi_CD11b | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 128 | 3.670771 | 0.056587 | 0.008929 | 0.000000 | 0.679111 | 4.658767 | 0.914266 | 0.460115 | 0.350665 | 5.195168 | ... | 0.421650 | 2.291917 | 3.248159e+06 | 29.494382 | 1268.427080 | 109.738416 | 95.977539 | 148.101528 | 0.664051 | 2.856221 |
| 134 | 3.699293 | 0.061935 | 0.000480 | 0.018516 | 0.469283 | 4.454700 | 0.726273 | 0.326015 | 0.355870 | 4.940048 | ... | 0.461571 | 2.143772 | 3.009227e+06 | 28.768293 | 1239.893628 | 111.079268 | 93.759427 | 136.821232 | 0.648494 | 2.772833 |
| 143 | 4.132936 | 0.121317 | 0.000000 | 0.072288 | 0.381220 | 4.771817 | 0.835068 | 0.752028 | 0.776899 | 5.284264 | ... | 0.622056 | 1.090778 | 3.314136e+06 | 32.333333 | 1527.835978 | 121.434331 | 116.605001 | 205.465665 | 0.660100 | 2.980600 |
| 151 | 2.898913 | 0.186142 | 0.000000 | 0.000000 | 0.446239 | 4.477918 | 0.888122 | 0.509072 | 0.301612 | 5.077932 | ... | 0.521675 | 1.211156 | 4.234478e+06 | 28.875000 | 1223.414749 | 106.462625 | 91.901124 | 146.797938 | 0.640070 | 2.542900 |
| 155 | 4.057487 | 0.026238 | 0.000000 | 0.000000 | 0.373822 | 4.530468 | 0.806315 | 0.477195 | 0.335697 | 5.059464 | ... | 0.495904 | 2.615620 | 2.689566e+06 | 30.416667 | 1345.294154 | 103.540667 | 92.856335 | 123.989168 | 0.648934 | 2.109529 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 930 | 3.535194 | 0.075773 | 0.015186 | 0.002612 | 1.086779 | 4.821739 | 0.952285 | 0.465389 | 0.446259 | 5.355195 | ... | 0.590835 | 2.109615 | 3.213992e+06 | 28.928571 | 1233.352685 | 110.993429 | 95.186905 | 146.202863 | 0.692576 | 2.984311 |
| 95 | 3.910763 | 0.047285 | 0.000000 | 0.009221 | 0.339663 | 4.466115 | 0.697826 | 0.373184 | 0.417306 | 4.935150 | ... | 0.342188 | 1.895184 | 2.947862e+06 | 28.169492 | 1229.049290 | 108.036661 | 90.572559 | 134.977830 | 0.645512 | 3.086529 |
| 955 | 3.528193 | 0.108484 | 0.002103 | 0.004946 | 1.038336 | 4.826519 | 0.965701 | 0.552395 | 0.329862 | 5.337544 | ... | 0.628141 | 2.076521 | 2.938843e+06 | 29.698020 | 1269.251279 | 112.911203 | 99.758831 | 152.887936 | 0.691522 | 2.836254 |
| 963 | 3.732434 | 0.080429 | 0.001812 | 0.007613 | 0.412950 | 4.548009 | 0.801500 | 0.337073 | 0.337084 | 5.050116 | ... | 0.393948 | 2.128251 | 3.292143e+06 | 28.233645 | 1218.201473 | 101.484687 | 84.187402 | 112.348696 | 0.659801 | 2.431688 |
| 975 | 3.348496 | 0.060802 | 0.001212 | 0.004227 | 0.469498 | 4.462860 | 0.745479 | 0.408833 | 0.327075 | 4.971698 | ... | 0.364441 | 2.008762 | 3.135354e+06 | 28.225000 | 1224.361722 | 110.579186 | 92.071839 | 133.884186 | 0.645089 | 2.370246 |
89 rows × 72 columns
NK_cell_markers = ['113In_CD57',
'144Nd_CD141',
'145Nd_CD4',
'146Nd_CD8',
'148Nd_CD16',
'155Gd_CD27',
'161Dy_CD56',
'168Er_CD3',
'170Er_CD38',
'174Yb_HLADR']
# Valid annotations = ['NK Cell CD56hi CD16-', 'NK Cell CD56lo CD16hi']
n1 = Network(CGM2)
n1.load_df(NK_Cell_cluster_list[NK_cell_markers].transpose(), meta_col=df['ds1000_cell_annotations'],col_cats = ['Tier2'])
# n1.set_global_cat_colors(df['cell_types'])
n1.set_manual_category(col='Tier2')
n1.widget()
#Tier2 Monocytes/DCs
M_DC_clusters = df['ds1000_cell_annotations'].loc[df['ds1000_cell_annotations']['Tier1'] == 'Monocyte/DC'].index.tolist()
M_DC_Cell_cluster_list = pd.DataFrame(columns = df['ds1000_cell'].columns.to_list())
for value in M_DC_clusters:
row = df['ds1000_cell'].loc[df['ds1000_cell'].index == value]
M_DC_Cell_cluster_list = pd.concat([M_DC_Cell_cluster_list,row])
M_DC_Cell_cluster_list
| 89Y_CD45 | 90Zr | 93Nb | 102Pd | 103Rh_Viability | 104Pd_Barcode | 105Pd_Barcode | 106Pd_Barcode | 108Pd_Barcode | 110Pd_Barcode | ... | 198Pt | 209Bi_CD11b | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 100 | 3.514340 | 0.081959 | 0.004233 | 0.011747 | 0.596421 | 4.635076 | 0.885786 | 0.493020 | 0.463627 | 5.138808 | ... | 0.634571 | 5.146466 | 2.850065e+06 | 28.100671 | 1186.528539 | 101.374047 | 84.627014 | 118.567913 | 0.658739 | 2.417530 |
| 101 | 4.440916 | 0.122186 | 0.005265 | 0.007005 | 0.639716 | 5.057707 | 1.143159 | 0.535157 | 0.442907 | 5.550769 | ... | 0.657316 | 0.719769 | 2.822885e+06 | 30.747664 | 1316.329322 | 106.653037 | 96.944215 | 136.182150 | 0.712640 | 3.689470 |
| 102 | 2.956316 | 0.060958 | 0.000000 | 0.049767 | 0.342149 | 4.896584 | 0.719570 | 0.279173 | 0.667897 | 5.450343 | ... | 0.213531 | 0.000000 | 2.647582e+06 | 29.800000 | 1319.748022 | 101.436200 | 88.895399 | 118.481398 | 0.728010 | 2.473015 |
| 103 | 3.257377 | 0.067242 | 0.003175 | 0.019237 | 0.700570 | 5.046454 | 1.198784 | 0.606672 | 0.618618 | 5.564250 | ... | 0.690572 | 0.138968 | 2.885165e+06 | 28.464789 | 1224.541094 | 109.349570 | 92.563303 | 141.370585 | 0.702789 | 4.063311 |
| 105 | 3.775092 | 0.095017 | 0.000000 | 0.000000 | 0.347643 | 4.595721 | 0.808683 | 0.489672 | 0.459022 | 5.091970 | ... | 0.492299 | 5.524245 | 3.232101e+06 | 30.140351 | 1306.614645 | 102.356158 | 91.061649 | 123.306245 | 0.656837 | 2.959408 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 98 | 4.207768 | 0.079629 | 0.007084 | 0.004805 | 0.498442 | 4.656467 | 0.881649 | 0.453177 | 0.437137 | 5.162063 | ... | 0.562753 | 5.137689 | 2.848931e+06 | 28.940541 | 1225.326338 | 101.786595 | 87.018592 | 115.658368 | 0.664800 | 2.621408 |
| 982 | 4.323494 | 0.159163 | 0.000000 | 0.000000 | 0.403773 | 4.754288 | 1.015481 | 0.398462 | 0.508891 | 5.256993 | ... | 0.573821 | 5.230655 | 3.149171e+06 | 33.266667 | 1404.559570 | 101.805500 | 99.729633 | 131.884933 | 0.668583 | 2.671545 |
| 985 | 4.226214 | 0.164611 | 0.008533 | 0.000000 | 0.376041 | 4.490658 | 0.811031 | 0.508416 | 0.389319 | 5.018059 | ... | 0.361075 | 5.818066 | 3.247636e+06 | 31.578947 | 1336.508198 | 96.570158 | 90.967053 | 120.146632 | 0.637949 | 2.126503 |
| 991 | 4.845125 | 0.078390 | 0.012441 | 0.000357 | 0.654554 | 4.825347 | 1.025155 | 0.501071 | 0.537594 | 5.359816 | ... | 0.479143 | 4.932470 | 2.948034e+06 | 31.291667 | 1327.607026 | 97.086479 | 90.010292 | 122.870146 | 0.673908 | 2.512897 |
| 999 | 2.996914 | 0.134581 | 0.005998 | 0.000000 | 0.538851 | 4.701279 | 1.030591 | 0.428052 | 0.343690 | 5.203071 | ... | 0.643780 | 4.950984 | 2.808717e+06 | 29.232558 | 1239.436558 | 103.587395 | 90.009372 | 125.602442 | 0.674582 | 2.257728 |
268 rows × 72 columns
M_DC_markers = [
'115In_CD11c',
'143Nd_CD45RA',
'145Nd_CD4',
'148Nd_CD16',
'150Nd_CD1c',
'151Eu_CD123',
'152Sm_CD66b',
'160Gd_CD14',
'161Dy_CD56',
'166Er_CD169',
'171Yb_CD161',
'170Er_CD38',
'172Yb_CD39',
'174Yb_HLADR',
'209Bi_CD11b']
#Valid annotations = ['CD14+ Monocyte',
# 'CD14+ CD16+ Monocyte',
# 'cDC',
# 'CD14-CD16-CD1c-CD123-',
# 'pDC',
# 'CD16+ Monocyte']
n1 = Network(CGM2)
n1.load_df(M_DC_Cell_cluster_list[M_DC_markers].transpose(), meta_col=df['ds1000_cell_annotations'],col_cats = ['Tier2'])
# n1.set_global_cat_colors(df['cell_types'])
n1.set_manual_category(col='Tier2')
n1.widget()
df['ds1000_cell_annotations'].loc[df['ds1000_cell_annotations']['Tier1'] == 'Monocyte/DC']['Tier2'].value_counts().index.tolist()
['CD14+ Monocyte', 'CD14+ CD16+ Monocyte', 'cDC', 'CD14-CD16-CD1c-CD123-', 'pDC', 'CD16+ Monocyte']
df['ds1000_cell'].columns.to_list()
['89Y_CD45', '90Zr', '93Nb', '102Pd', '103Rh_Viability', '104Pd_Barcode', '105Pd_Barcode', '106Pd_Barcode', '108Pd_Barcode', '110Pd_Barcode', '111Cd', '112Cd', '113In_CD57', '114Cd_B2M', '115In_CD11c', '116Cd_B2M', '127I', '140Ce', '141Pr_CD33', '142Nd_CD19', '143Nd_CD45RA', '144Nd_CD141', '145Nd_CD4', '146Nd_CD8', '147Sm_CD20_', '148Nd_CD16', '149Sm_CD127', '150Nd_CD1c', '151Eu_CD123', '152Sm_CD66b', '153Eu_TIGIT', '154Sm_ICOS', '155Gd_CD27', '156Gd_CD274', '157Gd', '158Gd_CD196', '159Tb_CD24', '160Gd_CD14', '161Dy_CD56', '162Dy_TCRgd', '163Dy_CD185', '164Dy_CCR10', '165Ho_CD294', '166Er_CD169', '167Er_CCR7', '168Er_CD3', '169Tm_CD25', '170Er_CD38', '171Yb_CD161', '172Yb_CD39', '173Yb_CXCR3', '174Yb_HLADR', '175Lu_CD279', '176Yb_CD194', '181Ta', '189Os', '191Ir_DNA', '192Os_Osmium', '193Ir_DNA', '194Pt', '195Pt_Viability', '196Pt', '198Pt', '209Bi_CD11b', 'Time', 'Event_length', 'Center', 'Offset', 'Width', 'Residual', 'bc_separation_dist', 'mahalanobis_dist']
all_markers = ['89Y_CD45',
'113In_CD57',
'115In_CD11c',
'141Pr_CD33',
'142Nd_CD19',
'143Nd_CD45RA',
'144Nd_CD141',
'145Nd_CD4',
'146Nd_CD8',
'147Sm_CD20_',
'148Nd_CD16',
'149Sm_CD127',
'150Nd_CD1c',
'151Eu_CD123',
'152Sm_CD66b',
'153Eu_TIGIT',
'154Sm_ICOS',
'155Gd_CD27',
'156Gd_CD274',
'158Gd_CD196',
'159Tb_CD24',
'160Gd_CD14',
'161Dy_CD56',
'162Dy_TCRgd',
'163Dy_CD185',
'164Dy_CCR10',
'165Ho_CD294',
'166Er_CD169',
'167Er_CCR7',
'168Er_CD3',
'169Tm_CD25',
'170Er_CD38',
'171Yb_CD161',
'172Yb_CD39',
'173Yb_CXCR3',
'174Yb_HLADR',
'175Lu_CD279',
'176Yb_CD194',
'209Bi_CD11b']
#Viewing Global Heatmap
n1 = Network(CGM2)
n1.load_df(df['ds1000_cell'][all_markers].transpose(), meta_col=df['ds1000_cell_annotations'],col_cats = ['Tier1','Tier2','Tier3'])
# n1.set_global_cat_colors(df['cell_types'])
# n1.set_manual_category(col='Tier2')
n1.widget()
# View final annotations
df['ds1000_cell_annotations']
| DS1000 | Tier1 | Tier2 | Tier3 | |
|---|---|---|---|---|
| 0 | 0 | T Cell | CD4+ Central Memory T Cell | |
| 1 | 1 | T Cell | CD4+ Central Memory T Cell | |
| 10 | 10 | T Cell | GD T Cell | |
| 100 | 100 | Monocyte/DC | CD14+ Monocyte | |
| 101 | 101 | Monocyte/DC | CD16+ Monocyte | |
| ... | ... | ... | ... | ... |
| 995 | 995 | T Cell | CD8+ Effector Memory T Cell | |
| 996 | 996 | T Cell | CD4+ Effector Memory T Cell | CD38+ HLADR+ |
| 997 | 997 | B Cell | Memory B Cell | |
| 998 | 998 | T Cell | CD8+ Naive T Cell | HLADR+ |
| 999 | 999 | Monocyte/DC | CD14+ Monocyte |
998 rows × 4 columns
df['meta_cell_live']
| index | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | Event_Rate | Bead | Cell | Viability | DS1000 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | T-0_HIMC03_200716_stain_RT_Fix_4_RT | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 | Pass | not-bead | y | Live | 501 |
| 1 | T-2_HIMC03_200716_stain_RT_Fix_4_RT | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 | Pass | not-bead | y | Live | 276 |
| 2 | T-3_HIMC03_200716_stain_RT_Fix_4_RT | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 | Pass | not-bead | y | Live | 963 |
| 3 | T-4_HIMC03_200716_stain_RT_Fix_4_RT | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 | Pass | not-bead | y | Live | 760 |
| 4 | T-5_HIMC03_200716_stain_RT_Fix_4_RT | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 | Pass | not-bead | y | Live | 304 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 141424 | T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 | Pass | not-bead | y | Live | 771 |
| 141425 | T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 | Pass | not-bead | y | Live | 513 |
| 141426 | T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 | Pass | not-bead | y | Live | 628 |
| 141427 | T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 | Pass | not-bead | y | Live | 421 |
| 141428 | T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 | Pass | not-bead | y | Live | 361 |
141429 rows × 14 columns
#Creating new meta_cell column
df['meta_cell_live']['Tier1'] = ''
#Need to convert meta_cell_live floats to strings
df['meta_cell_live']['DS1000'] = df['meta_cell_live']['DS1000'].astype(str)
ds1000 = df['ds1000_cell_annotations'].index.tolist()
#Mapping Tier1 to Single Cell
for value in ds1000:
tier1 = df['ds1000_cell_annotations'].loc[value,'Tier1']
df['meta_cell_live'].loc[df['meta_cell_live']['DS1000'] == value, 'Tier1'] = tier1
df['meta_cell_live']['Tier1'].value_counts()
T Cell 66497 Monocyte/DC 33210 B Cell 31591 NK Cell 8111 Basophil 1868 Neutrophil 152 Name: Tier1, dtype: int64
#Creating new meta_cell column
df['meta_cell_live']['Tier2'] = ''
#Mapping Tier2 to Single Cell
for value in ds1000:
tier2 = df['ds1000_cell_annotations'].loc[value,'Tier2']
df['meta_cell_live'].loc[df['meta_cell_live']['DS1000'] == value, 'Tier2'] = tier2
df['meta_cell_live']['Tier2'].value_counts()
CD14+ Monocyte 30296
CD4+ Central Memory T Cell 26869
Naive B Cell 24442
CD4+ Naive T Cell 12338
CD8+ Naive T Cell 8714
CD8+ Effector Memory T Cell 8465
NK Cell CD56lo CD16hi 7352
Memory B Cell 6947
CD4+ Effector Memory T Cell 4503
GD T Cell 2832
2313
NKT Cell 923
CD4-CD8- T Cell 901
cDC 866
CD8+ TEMRA 812
pDC 699
NK Cell CD56hi CD16- 666
CD14+ CD16+ Monocyte 642
CD14-CD16-CD1c-CD123- 464
CD16+ Monocyte 243
CD4+ CD8+ T Cell 140
Plasmablast 2
Name: Tier2, dtype: int64
#Creating new meta_cell column
df['meta_cell_live']['Tier3'] = ''
#Mapping Tier3 to Single Cell
for value in ds1000:
tier3 = df['ds1000_cell_annotations'].loc[value,'Tier3']
df['meta_cell_live'].loc[df['meta_cell_live']['DS1000'] == value, 'Tier3'] = tier3
df['meta_cell_live']['Tier3'].value_counts()
116114 CD38+ 21857 HLADR+ 1962 CD38+ HLADR+ 1492 PD-1+ TIGIT+ 4 Name: Tier3, dtype: int64
# View final meta_cell_live dataframe with annotations
df['meta_cell_live']
| index | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | Event_Rate | Bead | Cell | Viability | DS1000 | Tier1 | Tier2 | Tier3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | T-0_HIMC03_200716_stain_RT_Fix_4_RT | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 | Pass | not-bead | y | Live | 501 | T Cell | CD4+ Naive T Cell | CD38+ |
| 1 | T-2_HIMC03_200716_stain_RT_Fix_4_RT | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 | Pass | not-bead | y | Live | 276 | B Cell | Memory B Cell | |
| 2 | T-3_HIMC03_200716_stain_RT_Fix_4_RT | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 | Pass | not-bead | y | Live | 963 | NK Cell | NK Cell CD56lo CD16hi | |
| 3 | T-4_HIMC03_200716_stain_RT_Fix_4_RT | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 | Pass | not-bead | y | Live | 760 | T Cell | CD4+ Central Memory T Cell | |
| 4 | T-5_HIMC03_200716_stain_RT_Fix_4_RT | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 | Pass | not-bead | y | Live | 304 | T Cell | CD4+ Central Memory T Cell | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 141424 | T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 | Pass | not-bead | y | Live | 771 | B Cell | Naive B Cell | |
| 141425 | T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 | Pass | not-bead | y | Live | 513 | T Cell | CD4-CD8- T Cell | |
| 141426 | T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 | Pass | not-bead | y | Live | 628 | T Cell | CD4+ Effector Memory T Cell | |
| 141427 | T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 | Pass | not-bead | y | Live | 421 | Monocyte/DC | CD14+ Monocyte | |
| 141428 | T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 | Pass | not-bead | y | Live | 361 | B Cell | Naive B Cell |
141429 rows × 17 columns
#Build freq table of cell types in this file
final_cell_counts = df['meta_cell_live']['Tier1'].value_counts().to_dict()
final_cell_counts
{'T Cell': 66497,
'Monocyte/DC': 33210,
'B Cell': 31591,
'NK Cell': 8111,
'Basophil': 1868,
'Neutrophil': 152}
final_cell_counts_freq = {}
for key in final_cell_counts:
freq = final_cell_counts[key]/len(df['meta_cell_live'])
final_cell_counts_freq[key] = freq*100
final_cell_counts_freq
{'T Cell': 47.01793832947981,
'Monocyte/DC': 23.481747025009014,
'B Cell': 22.33700301918277,
'NK Cell': 5.735033126162244,
'Basophil': 1.3208040783714796,
'Neutrophil': 0.10747442179468143}
keys = list(final_cell_counts_freq)
keys
['T Cell', 'Monocyte/DC', 'B Cell', 'NK Cell', 'Basophil', 'Neutrophil']
freq_list = []
for value in keys:
freq = final_cell_counts_freq[value]
freq_list.append(freq)
freq_list
[47.01793832947981, 23.481747025009014, 22.33700301918277, 5.735033126162244, 1.3208040783714796, 0.10747442179468143]
freq_list_float = []
for value in freq_list:
new_value = float(value)
freq_list_float.append(new_value)
freq_list_float
[47.01793832947981, 23.481747025009014, 22.33700301918277, 5.735033126162244, 1.3208040783714796, 0.10747442179468143]
#My pie chart
explode = (0.1,0.1,0.1,0.1,0.1,0.1)
fig1, ax1 = plt.subplots()
ax1.pie(freq_list_float, labels=keys, explode=explode,autopct='%1.1f%%',pctdistance=1.5,labeldistance=2)
ax1.axis('equal')
plt.show()
#UMAP
clustering_parameters
['142Nd_CD19', '143Nd_CD45RA', '145Nd_CD4', '146Nd_CD8', '148Nd_CD16', '150Nd_CD1c', '151Eu_CD123', '152Sm_CD66b', '155Gd_CD27', '160Gd_CD14', '161Dy_CD56', '168Er_CD3', '170Er_CD38', '174Yb_HLADR']
df_cluster_params = df['cell_live'][clustering_parameters]
import umap.umap_ as umap
def make_umap_coordinates(df_cluster_params, n_neighbors=20, min_dist=0.1):
# UMAP Clustering
embedding = umap.UMAP(n_neighbors=n_neighbors, random_state=99,
min_dist=min_dist, metric='correlation').fit_transform(df_cluster_params)
df_umap = pd.DataFrame(data=embedding, columns=['x', 'y'], index=df_cluster_params.index.tolist())
x = embedding[:,0]
y = embedding[:,1]
return df_umap
%%time
df_umap = make_umap_coordinates(df_cluster_params, n_neighbors=20, min_dist=0.5)
CPU times: user 4min 18s, sys: 7.5 s, total: 4min 25s Wall time: 2min 5s
plt.scatter(df_umap['x'],df_umap['y'])
<matplotlib.collections.PathCollection at 0x7fe4fc6eb370>
df_umap
| x | y | |
|---|---|---|
| 0 | 17.127739 | 1.642288 |
| 1 | 4.517080 | 13.816017 |
| 2 | 3.990696 | 5.489047 |
| 3 | 12.313708 | -1.470701 |
| 4 | 12.205711 | 0.282254 |
| ... | ... | ... |
| 141424 | 2.026022 | 13.965105 |
| 141425 | 11.905201 | 5.680197 |
| 141426 | 6.804664 | -1.721996 |
| 141427 | -0.251455 | -3.441140 |
| 141428 | 1.568582 | 12.398086 |
141429 rows × 2 columns
#Merging df['meta_cell_live'] and df_umap
df['meta_cell_live'] = pd.concat([df['meta_cell_live'],df_umap], axis = 1)
df['meta_cell_live']
| index | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | Event_Rate | Bead | Cell | Viability | DS1000 | Tier1 | Tier2 | Tier3 | x | y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | T-0_HIMC03_200716_stain_RT_Fix_4_RT | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 | Pass | not-bead | y | Live | 501 | T Cell | CD4+ Naive T Cell | CD38+ | 17.127739 | 1.642288 |
| 1 | T-2_HIMC03_200716_stain_RT_Fix_4_RT | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 | Pass | not-bead | y | Live | 276 | B Cell | Memory B Cell | 4.517080 | 13.816017 | |
| 2 | T-3_HIMC03_200716_stain_RT_Fix_4_RT | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 | Pass | not-bead | y | Live | 963 | NK Cell | NK Cell CD56lo CD16hi | 3.990696 | 5.489047 | |
| 3 | T-4_HIMC03_200716_stain_RT_Fix_4_RT | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 | Pass | not-bead | y | Live | 760 | T Cell | CD4+ Central Memory T Cell | 12.313708 | -1.470701 | |
| 4 | T-5_HIMC03_200716_stain_RT_Fix_4_RT | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 | Pass | not-bead | y | Live | 304 | T Cell | CD4+ Central Memory T Cell | 12.205711 | 0.282254 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 141424 | T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 | Pass | not-bead | y | Live | 771 | B Cell | Naive B Cell | 2.026022 | 13.965105 | |
| 141425 | T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 | Pass | not-bead | y | Live | 513 | T Cell | CD4-CD8- T Cell | 11.905201 | 5.680197 | |
| 141426 | T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 | Pass | not-bead | y | Live | 628 | T Cell | CD4+ Effector Memory T Cell | 6.804664 | -1.721996 | |
| 141427 | T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 | Pass | not-bead | y | Live | 421 | Monocyte/DC | CD14+ Monocyte | -0.251455 | -3.441140 | |
| 141428 | T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 | Pass | not-bead | y | Live | 361 | B Cell | Naive B Cell | 1.568582 | 12.398086 |
141429 rows × 19 columns
df['meta_cell_live']['Tier2'].value_counts()
CD14+ Monocyte 30296
CD4+ Central Memory T Cell 26869
Naive B Cell 24442
CD4+ Naive T Cell 12338
CD8+ Naive T Cell 8714
CD8+ Effector Memory T Cell 8465
NK Cell CD56lo CD16hi 7352
Memory B Cell 6947
CD4+ Effector Memory T Cell 4503
GD T Cell 2832
2313
NKT Cell 923
CD4-CD8- T Cell 901
cDC 866
CD8+ TEMRA 812
pDC 699
NK Cell CD56hi CD16- 666
CD14+ CD16+ Monocyte 642
CD14-CD16-CD1c-CD123- 464
CD16+ Monocyte 243
CD4+ CD8+ T Cell 140
Plasmablast 2
Name: Tier2, dtype: int64
#Need to explore why some Tier2 annotations are blank
#Searching for Tier1 samples that have Tier2 = ''
df['meta_cell_live'].loc[df['meta_cell_live']['Tier2'] == '']['Tier1'].value_counts()
Basophil 1868 B Cell 200 Neutrophil 152 NK Cell 93 Name: Tier1, dtype: int64
# Adding to Tier2 Annotations
df['meta_cell_live'].loc[df['meta_cell_live']['Tier1'] == 'Basophil','Tier2'] = 'Basophil'
df['meta_cell_live'].loc[df['meta_cell_live']['Tier1'] == 'Neutrophil','Tier2'] = 'Neutrophil'
df['meta_cell_live'].loc[df['meta_cell_live']['Tier2'] == '','Tier2'] = 'Undefined'
df['meta_cell_live']['Tier2'].value_counts()
CD14+ Monocyte 30296 CD4+ Central Memory T Cell 26869 Naive B Cell 24442 CD4+ Naive T Cell 12338 CD8+ Naive T Cell 8714 CD8+ Effector Memory T Cell 8465 NK Cell CD56lo CD16hi 7352 Memory B Cell 6947 CD4+ Effector Memory T Cell 4503 GD T Cell 2832 Basophil 1868 NKT Cell 923 CD4-CD8- T Cell 901 cDC 866 CD8+ TEMRA 812 pDC 699 NK Cell CD56hi CD16- 666 CD14+ CD16+ Monocyte 642 CD14-CD16-CD1c-CD123- 464 Undefined 293 CD16+ Monocyte 243 Neutrophil 152 CD4+ CD8+ T Cell 140 Plasmablast 2 Name: Tier2, dtype: int64
!pip install ggplot
Requirement already satisfied: ggplot in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (0.11.5) Requirement already satisfied: numpy in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from ggplot) (1.20.1) Requirement already satisfied: matplotlib in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from ggplot) (3.3.4) Requirement already satisfied: six in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from ggplot) (1.15.0) Requirement already satisfied: pandas in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from ggplot) (1.2.4) Requirement already satisfied: cycler in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from ggplot) (0.10.0) Requirement already satisfied: statsmodels in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from ggplot) (0.12.2) Requirement already satisfied: patsy>=0.4 in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from ggplot) (0.5.1) Requirement already satisfied: brewer2mpl in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from ggplot) (1.4.1) Requirement already satisfied: scipy in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from ggplot) (1.6.2) Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from matplotlib->ggplot) (2.4.7) Requirement already satisfied: kiwisolver>=1.0.1 in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from matplotlib->ggplot) (1.3.1) Requirement already satisfied: pillow>=6.2.0 in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from matplotlib->ggplot) (8.2.0) Requirement already satisfied: python-dateutil>=2.1 in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from matplotlib->ggplot) (2.8.1) Requirement already satisfied: pytz>=2017.3 in /Users/daniel.geanon/opt/anaconda3/lib/python3.8/site-packages (from pandas->ggplot) (2021.1)
#Will attempt to color UMAP by clustergrammer annotated populations
UMAP_color_by_population = ggplot(df['meta_cell_live'], aes(x='x', y='y', color='Tier2')) \
+ geom_point(size=0.1) \
+ labs(title='Healthy Donor CyTOF UMAP Colored by Annotated Population') \
+ theme_classic() \
+ theme(figure_size=(8,8)) \
# UMAP_color_by_population.save(filename = 'HD_PBMC'+'.png')
UMAP_color_by_population
<ggplot: (8788857390327)>
df['cell_live']
| index | 89Y_CD45 | 90Zr | 93Nb | 102Pd | 103Rh_Viability | 104Pd_Barcode | 105Pd_Barcode | 106Pd_Barcode | 108Pd_Barcode | ... | 209Bi_CD11b | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | DS1000 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | T-0_HIMC03_200716_stain_RT_Fix_4_RT | 4.671640 | 0.185884 | 0.0 | 0.0 | 0.264788 | 3.881851 | 0.351883 | 0.000000 | 0.000000 | ... | 0.000000 | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 | 501 |
| 1 | T-2_HIMC03_200716_stain_RT_Fix_4_RT | 3.358925 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.427327 | 0.422676 | 1.588577 | 0.054833 | ... | 0.000000 | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 | 276 |
| 2 | T-3_HIMC03_200716_stain_RT_Fix_4_RT | 3.359653 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.028588 | 1.166933 | 0.000000 | 0.664371 | ... | 3.219539 | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 | 963 |
| 3 | T-4_HIMC03_200716_stain_RT_Fix_4_RT | 4.524398 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.744371 | 0.929965 | 0.322028 | 0.315484 | ... | 0.000000 | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 | 760 |
| 4 | T-5_HIMC03_200716_stain_RT_Fix_4_RT | 3.061826 | 0.000000 | 0.0 | 0.0 | 0.072396 | 4.364128 | 0.687106 | 0.217498 | 0.000000 | ... | 1.093904 | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 | 304 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 141424 | T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 4.258232 | 0.000000 | 0.0 | 0.0 | 0.000000 | 3.731163 | 0.863907 | 0.000000 | 0.000000 | ... | 0.000000 | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 | 771 |
| 141425 | T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 4.118524 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.124214 | 1.716502 | 0.190580 | 1.238342 | ... | 0.000000 | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 | 513 |
| 141426 | T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 3.098846 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.930268 | 1.494842 | 0.204756 | 0.000000 | ... | 0.000000 | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 | 628 |
| 141427 | T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 3.866431 | 0.000000 | 0.0 | 0.0 | 0.634893 | 4.818819 | 0.000000 | 0.098201 | 0.629816 | ... | 4.910204 | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 | 421 |
| 141428 | T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 4.448786 | 0.000000 | 0.0 | 0.0 | 0.000000 | 4.381222 | 1.449402 | 0.113284 | 0.000000 | ... | 0.000000 | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 | 361 |
141429 rows × 74 columns
df['meta_cell_live']
| index | Time | Event_length | Center | Offset | Width | Residual | bc_separation_dist | mahalanobis_dist | Event_Rate | Bead | Cell | Viability | DS1000 | Tier1 | Tier2 | Tier3 | x | y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | T-0_HIMC03_200716_stain_RT_Fix_4_RT | 7.422000e+00 | 20.0 | 814.091980 | 83.304001 | 49.131001 | 99.531998 | 0.604943 | 2.558261 | Pass | not-bead | y | Live | 501 | T Cell | CD4+ Naive T Cell | CD38+ | 17.127739 | 1.642288 |
| 1 | T-2_HIMC03_200716_stain_RT_Fix_4_RT | 5.216100e+01 | 31.0 | 1207.651978 | 110.832001 | 102.127998 | 140.136002 | 0.549621 | 2.763372 | Pass | not-bead | y | Live | 276 | B Cell | Memory B Cell | 4.517080 | 13.816017 | |
| 2 | T-3_HIMC03_200716_stain_RT_Fix_4_RT | 7.175800e+01 | 20.0 | 916.047974 | 94.004997 | 54.682999 | 43.006001 | 0.535007 | 2.483433 | Pass | not-bead | y | Live | 963 | NK Cell | NK Cell CD56lo CD16hi | 3.990696 | 5.489047 | |
| 3 | T-4_HIMC03_200716_stain_RT_Fix_4_RT | 1.354690e+02 | 24.0 | 1020.330994 | 90.792000 | 63.805000 | 87.530998 | 0.710983 | 0.960834 | Pass | not-bead | y | Live | 760 | T Cell | CD4+ Central Memory T Cell | 12.313708 | -1.470701 | |
| 4 | T-5_HIMC03_200716_stain_RT_Fix_4_RT | 1.394270e+02 | 42.0 | 2028.844971 | 98.490997 | 123.507004 | 186.283997 | 0.664595 | 4.623672 | Pass | not-bead | y | Live | 304 | T Cell | CD4+ Central Memory T Cell | 12.205711 | 0.282254 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 141424 | T-160582_HIMC03_200716_stain_RT_Fix_4_RT | 6.162316e+06 | 38.0 | 1604.358032 | 114.376999 | 130.399994 | 199.934998 | 0.516190 | 5.822927 | Pass | not-bead | y | Live | 771 | B Cell | Naive B Cell | 2.026022 | 13.965105 | |
| 141425 | T-160583_HIMC03_200716_stain_RT_Fix_4_RT | 6.162324e+06 | 44.0 | 1909.963013 | 98.290001 | 129.996994 | 202.843994 | 0.467159 | 5.337243 | Pass | not-bead | y | Live | 513 | T Cell | CD4-CD8- T Cell | 11.905201 | 5.680197 | |
| 141426 | T-160584_HIMC03_200716_stain_RT_Fix_4_RT | 6.162336e+06 | 25.0 | 1083.738037 | 134.923004 | 97.656998 | 146.084000 | 0.665962 | 6.247370 | Pass | not-bead | y | Live | 628 | T Cell | CD4+ Effector Memory T Cell | 6.804664 | -1.721996 | |
| 141427 | T-160585_HIMC03_200716_stain_RT_Fix_4_RT | 6.162362e+06 | 24.0 | 882.460022 | 94.766998 | 67.106003 | 91.281998 | 0.762524 | 2.923641 | Pass | not-bead | y | Live | 421 | Monocyte/DC | CD14+ Monocyte | -0.251455 | -3.441140 | |
| 141428 | T-160586_HIMC03_200716_stain_RT_Fix_4_RT | 6.162367e+06 | 20.0 | 873.732971 | 93.112999 | 54.383999 | 58.018002 | 0.563013 | 2.470218 | Pass | not-bead | y | Live | 361 | B Cell | Naive B Cell | 1.568582 | 12.398086 |
141429 rows × 19 columns
df['cell_live'].to_csv('/Users/daniel.geanon/OneDrive - Karolinska Institutet/Mac/Desktop/Python_Projects/210602_CyTOF_HD/CyTOF_HD_cell_v2.csv')
df['meta_cell_live'].to_csv('/Users/daniel.geanon/OneDrive - Karolinska Institutet/Mac/Desktop/Python_Projects/210602_CyTOF_HD/CyTOF_HD_meta_cell_v2.csv.csv')
import os
!jupyter nbconvert 1_HD_file_clustering_downsample --to html